Approach: Major cell types (level 3) vs Mostafavi vs MF
Previous script: 01_prepare_input.R
# Reference: Dataset 01
# Test: Dataset 02
# Rationale: Are the modules from the **Reference** networks preserved in the **Test** data set?
### SET PARAMETERS
snakemake_sn_input_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/input/"
snakemake_sn_output_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/"
cluster_lv = "3"
numthreads = 2
cell_ids = c("ast", "mic", "oli", "end", "opc", "ext", "inh", "Sara", "bulk_MF")
nSets = length(cell_ids)# Heavy chunk. Calculate pairwise module preservation stats
multiExpr = list()
multiColor = list()
for (set in 1:nSets) {
message(paste("Working on set:", cell_ids[set]))
# Residuals of the expression:
res_dataset_01 = read.table(paste0(snakemake_sn_input_dir, cell_ids[set], ".txt"), header = T, check.names = F, stringsAsFactors = F)
res_dataset_01 = as.data.frame(t(res_dataset_01))
# clusters from SpeakEasy:
k_dataset_01 = read.table(paste0(snakemake_sn_output_dir, cell_ids[set], "/geneBycluster.txt"), header = T, stringsAsFactors = F)
k_dataset_01$ensembl = gsub("(.*)\\.(.*)","\\1",k_dataset_01$ensembl)
# Match and sort genes in Reference modules
k_dataset_01.match = k_dataset_01 %>% filter(ensembl %in% colnames(res_dataset_01))
k_dataset_01.match = k_dataset_01.match[match(colnames(res_dataset_01), k_dataset_01.match$ensembl),]
multiExpr[[set]] = list(data = res_dataset_01)
multiColor[[set]] = paste0("M", k_dataset_01.match[,paste0("cluster_lv",cluster_lv)])
}
names(multiExpr) = cell_ids
names(multiColor) = cell_ids
# Here comes the calculation of module preservation, it takes a week for the SN dataset
enableWGCNAThreads(nThreads = numthreads)
system.time( {
mp = modulePreservation(multiExpr, multiColor,
dataIsExpr = T,
networkType = "unsigned", # default: unsigned
referenceNetworks = c(1:nSets),
maxModuleSize = 20000,
maxGoldModuleSize = 500,
nPermutations = 200, # Default = 200
quickCor = 1,
randomSeed = 2022,
parallelCalculation = T,
verbose = 4)
} )
# Save the results
save(mp, file = paste0(work_dir, "mp_sn_mf_sara.RData"))load(file = paste0(work_dir, "mp_sn_mf_sara.RData"))
cell_ids = c("ast", "mic", "oli", "end", "opc", "ext", "inh", "Sara", "bulk_MF")
nSets = length(cell_ids)
res_all = data.frame()
for (ref in 1:nSets) {
for (test in 1:nSets) {
if(ref!=test){
statsObs = cbind(mp$quality$observed[[ref]][[test]][,-1], mp$preservation$observed[[ref]][[test]][,-1])
statsObs$module_id = rownames(statsObs)
statsZ = cbind(mp$quality$Z[[ref]][[test]], mp$preservation$Z[[ref]][[test]][,-1])
statsZ$module_id = rownames(statsZ)
res_df = statsObs %>% dplyr::left_join(statsZ, by = c("module_id"))
res_df$ref = cell_ids[ref]
res_df$test = cell_ids[test]
rownames(res_df) = NULL
#res_df2 = cbind(statsObs[, c("medianRank.pres", "medianRank.qual")], signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2))
#res_df2$ref = cell_ids[ref]
#res_df2$test = cell_ids[test]
res_all = bind_rows(res_all,res_df)
}
}
}
res_final = res_all[,c("module_id","moduleSize","ref","test",colnames(res_all)[!colnames(res_all) %in% c("module_id","moduleSize","ref","test")])]
createDT(res_final)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
p <- ggplot(res_final %>% filter(moduleSize >= 30), aes(x = moduleSize, y = Zsummary.pres )) +
geom_hline(yintercept = c(2,10), linetype = "dashed") +
geom_point(shape = 21) +
geom_label_repel(aes(label = module_id), max.overlaps = 25) +
# geom_label(aes(label = module_id)) +
labs(y = "Preservation Zsummary", x = "Module size", title = "Preservation Zsummary") +
theme_classic() +
facet_wrap(~ref+test, scales = "free", ncol = 7)
# pdf(file = paste0(work_dir, "h_fisher_mp_majorcells_sn.pdf"), width = 30, height = 40)
# p
# dev.off()
p#res_final %>% select(module_id,ref,test,Zsummary.pres) %>% mutate(id = paste(ref,module_id,sep = "_")) %>%
# pivot_wider(id_cols = id, names_from = test, values_from = Zsummary.pres)
res_mtx_sum = res_final %>% filter(moduleSize >= 30) %>% select(module_id,ref,test,Zsummary.pres) %>% mutate(id = paste(ref,module_id,sep = "_")) %>%
pivot_wider(id_cols = ref, names_from = test, values_from = Zsummary.pres, values_fn = function(x) sum(x<2, na.rm = T)) %>% column_to_rownames("ref")
res_mtx_sum = res_mtx_sum[,rownames(res_mtx_sum)]
suppressPackageStartupMessages(library(ComplexHeatmap))
library(leaflet)
my.breaks <- c(seq(0, max(res_mtx_sum, na.rm = T), by=1))
my.colors <- colorBin("Spectral", bins = my.breaks, na.color = "#aaff56", reverse = T, pretty = T)
draw_colnames_45 <- function (coln, gaps, ...) {
coord <- pheatmap:::find_coordinates(length(coln), gaps)
x <- coord$coord - 0.5 * coord$size
res <- grid::textGrob(
coln, x = x, y = unit(1, "npc") - unit(3,"bigpts"),
vjust = 0.75, hjust = 1, rot = 90, gp = grid::gpar(...)
)
return(res)
}
assignInNamespace(
x = "draw_colnames",
value = "draw_colnames_45",
ns = asNamespace("pheatmap")
)
(p_heat = Heatmap(res_mtx_sum,
cluster_columns = F, cluster_rows = F,
heatmap_legend_param = list(title = "# modules Zsum<2"),
row_names_gp = gpar(fontsize = 10), column_names_gp = gpar(fontsize = 10),
column_title = "Test set",
row_title = "Reference set",
col = my.colors(my.breaks),
rect_gp = gpar(col = "gray", lwd = .8),
cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
grid.text(sprintf("%d", res_mtx_sum[i,j]), x, y, gp = gpar(fontsize = 8))
}
))sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] leaflet_2.1.1 ComplexHeatmap_2.11.1
## [3] SummarizedExperiment_1.24.0 Biobase_2.54.0
## [5] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1
## [7] IRanges_2.28.0 S4Vectors_0.32.3
## [9] BiocGenerics_0.40.0 MatrixGenerics_1.6.0
## [11] matrixStats_0.61.0 ggeasy_0.1.3
## [13] ggpubr_0.4.0 ggrepel_0.9.1
## [15] WGCNA_1.70-3 fastcluster_1.2.3
## [17] dynamicTreeCut_1.63-1 forcats_0.5.1
## [19] stringr_1.4.0 dplyr_1.0.8
## [21] purrr_0.3.4 readr_2.1.2
## [23] tidyr_1.2.0 tibble_3.2.1
## [25] ggplot2_3.4.1 tidyverse_1.3.1
## [27] limma_3.50.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.4.1 circlize_0.4.14
## [4] Hmisc_4.6-0 splines_4.1.2 crosstalk_1.2.0
## [7] digest_0.6.29 foreach_1.5.2 htmltools_0.5.2
## [10] GO.db_3.14.0 fansi_1.0.4 magrittr_2.0.3
## [13] checkmate_2.0.0 memoise_2.0.1 cluster_2.1.2
## [16] doParallel_1.0.17 tzdb_0.2.0 Biostrings_2.62.0
## [19] modelr_0.1.8 jpeg_0.1-9 colorspace_2.0-3
## [22] blob_1.2.2 rvest_1.0.2 haven_2.4.3
## [25] xfun_0.29 crayon_1.5.0 RCurl_1.98-1.6
## [28] jsonlite_1.7.3 impute_1.68.0 survival_3.2-13
## [31] iterators_1.0.14 glue_1.6.2 gtable_0.3.0
## [34] zlibbioc_1.40.0 XVector_0.34.0 GetoptLong_1.0.5
## [37] DelayedArray_0.20.0 car_3.0-12 shape_1.4.6
## [40] abind_1.4-5 scales_1.2.1 pheatmap_1.0.12
## [43] DBI_1.1.2 rstatix_0.7.0 Rcpp_1.0.8
## [46] htmlTable_2.4.0 clue_0.3-60 foreign_0.8-81
## [49] bit_4.0.4 preprocessCore_1.56.0 Formula_1.2-4
## [52] DT_0.20 htmlwidgets_1.5.4 httr_1.4.2
## [55] RColorBrewer_1.1-2 ellipsis_0.3.2 pkgconfig_2.0.3
## [58] farver_2.1.0 nnet_7.3-16 sass_0.4.0
## [61] dbplyr_2.1.1 utf8_1.2.3 tidyselect_1.1.2
## [64] labeling_0.4.2 rlang_1.1.1 AnnotationDbi_1.56.2
## [67] munsell_0.5.0 cellranger_1.1.0 tools_4.1.2
## [70] cachem_1.0.6 cli_3.6.1 generics_0.1.2
## [73] RSQLite_2.2.10 broom_0.7.12 evaluate_0.15
## [76] fastmap_1.1.0 yaml_2.3.5 knitr_1.37
## [79] bit64_4.0.5 fs_1.5.2 KEGGREST_1.34.0
## [82] xml2_1.3.3 compiler_4.1.2 rstudioapi_0.13
## [85] png_0.1-7 ggsignif_0.6.3 reprex_2.0.1
## [88] bslib_0.3.1 stringi_1.7.6 highr_0.9
## [91] lattice_0.20-45 Matrix_1.5-1 vctrs_0.6.3
## [94] pillar_1.9.0 lifecycle_1.0.3 jquerylib_0.1.4
## [97] GlobalOptions_0.1.2 data.table_1.14.2 bitops_1.0-7
## [100] R6_2.5.1 latticeExtra_0.6-29 gridExtra_2.3
## [103] codetools_0.2-18 assertthat_0.2.1 rjson_0.2.21
## [106] withr_2.5.0 GenomeInfoDbData_1.2.7 parallel_4.1.2
## [109] hms_1.1.1 rpart_4.1-15 rmarkdown_2.11
## [112] carData_3.0-5 lubridate_1.8.0 base64enc_0.1-3